library(dslabs)
library(readr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.5 ✔ dplyr 1.0.8
## ✔ tibble 3.1.6 ✔ stringr 1.4.0
## ✔ tidyr 1.2.0 ✔ forcats 0.5.1
## ✔ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(here)
## here() starts at C:/Users/Bas/Desktop/School/Programmeren/datascience/portfolio
library(DBI)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
# Laden van flu data en de eerste 10 rows skippen
flu_data <- read_csv(here("data","flu_data.csv"), skip = 10)
## Rows: 659 Columns: 30
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (29): Argentina, Australia, Austria, Belgium, Bolivia, Brazil, Bulgaria...
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Laden van denque data en de eerste 10 rows skippen
dengue_data <- read_csv(here("data","dengue_data.csv"), skip = 10)
## Rows: 659 Columns: 11
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (10): Argentina, Bolivia, Brazil, India, Indonesia, Mexico, Philippines...
## date (1): Date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Laden van gampinder in gampinder (niet nuttig).
gapminder <- gapminder
# Date alleen per jaar laten zien
#flu_data$Date <- format(flu_data$Date, "%Y")
# Date alleen per jaar laten zien
#dengue_data$Date <- format(dengue_data$Date, "%Y")
# gapminder zelfde colnaam geven
gapminder_tidy <- gapminder %>% rename(Date = year)
# flu_data tidy maken
flu_data_tidy <- pivot_longer(data = flu_data, cols = -c("Date"), names_to = "country", values_to = "cases")
# en factor van country maken
flu_data_tidy$country <- as.factor(flu_data_tidy$country)
# dengue_data tidy maken
dengue_data_tidy <- pivot_longer(data = dengue_data, cols = -c("Date"), names_to = "country", values_to = "activity")
# en factor van country maken
dengue_data_tidy$country <- as.factor(dengue_data_tidy$country)
# de duplicate rows verwijderen
flu_data_tidy <- unique(flu_data_tidy[,1:3])
# de duplicate rows verwijderen
dengue_data_tidy <- unique(dengue_data_tidy[,1:3])
# Oplsaan als CSV bestand
write_csv(flu_data_tidy, path = here("data","flu_data_tidy.csv"))
## Warning: The `path` argument of `write_csv()` is deprecated as of readr 1.4.0.
## Please use the `file` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
# Oplsaan als CSV bestand
write_csv(dengue_data_tidy, path = here("data","dengue_data_tidy.csv"))
# Oplsaan als CSV bestand
write_csv(gapminder_tidy, path = here("data","gapminder_tidy.csv"))
# opslaan als rds bestand
write_rds(flu_data_tidy, path = here("data","flu_data_tidy.rds"))
## Warning: The `path` argument of `write_rds()` is deprecated as of readr 1.4.0.
## Please use the `file` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
# opslaan als rds bestand
write_rds(dengue_data_tidy, path = here("data","dengue_data_tidy.rds"))
# opslaan als rds bestand
write_rds(gapminder_tidy, path = here("data","gapminder_tidy.rds"))
# en table maken
CREATE TABLE flu_data (
Date VARCHAR(50),
country VARCHAR(50),
cases VARCHAR(50),
CONSTRAINT PK_flu PRIMARY KEY (Date,country)
);
# de date naar de table verplaatsen
COPY flu_data FROM 'C:/Users/Bas/Desktop/School/Programmeren/datascience/portfolio/data/flu_data_tidy.csv' WITH (FORMAT csv);
# de table laten zien
SELECT * FROM flu_data;
# en table maken
CREATE TABLE dengue_data (
Date VARCHAR(50),
country VARCHAR(50),
activity VARCHAR(50),
CONSTRAINT PK_dengue PRIMARY KEY (Date,country)
);
# de date naar de table verplaatsen
COPY dengue_data FROM 'C:/Users/Bas/Desktop/School/Programmeren/datascience/portfolio/data/dengue_data_tidy.csv' WITH (FORMAT csv);
# de table laten zien
SELECT * FROM dengue_data;
# connect to the database
con <- dbConnect(RPostgres::Postgres(),
dbname = "workflowsdb",
host="localhost",
port="5432",
user="postgres",
password="kaas")
# laat de tables zien
dbListTables(con)
## [1] "flu_data" "dengue_data" "gapminder"
# laat de colummen in flu_data zien
dbListFields(con, "flu_data")
## [1] "date" "country" "cases"
# laat het tabel flu_data zien
head(dbGetQuery(con, 'SELECT * FROM flu_data'))
## date country cases
## 1 Date country cases
## 2 2002-12-29 Argentina NA
## 3 2002-12-29 Australia NA
## 4 2002-12-29 Austria NA
## 5 2002-12-29 Belgium NA
## 6 2002-12-29 Bolivia NA
# disconnect van de database
dbDisconnect(con)
#create gapminder table
CREATE TABLE gapminder (
country VARCHAR(50),
Date VARCHAR(50),
infant_mortality VARCHAR(50) not null,
life_expectancy VARCHAR(50) not null,
fertitlity VARCHAR(50) not null,
population VARCHAR(50) not null,
gdp VARCHAR(50) not null,
continent VARCHAR(50),
region VARCHAR(50),
CONSTRAINT PK_gapminder PRIMARY KEY (Date,country)
);
#import the gampinder file
COPY gapminder FROM 'C:/Users/Bas/Desktop/School/Programmeren/datascience/portfolio/data/gapminder_tidy.csv' WITH (FORMAT csv);
# laat de tabel zien
SELECT * FROM gapminder;
# connect to the database
con <- dbConnect(RPostgres::Postgres(),
dbname = "workflowsdb",
host="localhost",
port="5432",
user="postgres",
password="kaas")
gapminder_flu <- dbGetQuery(con, 'select distinct *
from flu_data,gapminder
where flu_data.country = gapminder.country;'
)
# disconnect van de database
dbDisconnect(con)
# filter for the Netherlands and calculate the average cases over time
cases_netherlands <- gapminder_flu %>% filter(country == "Netherlands") %>% group_by(date=as.Date(date)) %>% summarise(mean_cases=mean(as.numeric(cases)))
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
## Warning in mean(as.numeric(cases)): NAs introduced by coercion
# make a gg line plot
netherlands_graph <- cases_netherlands %>%
ggplot(aes(x = date, y = mean_cases)) +
geom_line() +
labs(
title = "flu cases over time in the netherlands",
y = "cases"
)
ggplotly(netherlands_graph)
gapminder_flu$date <- as.Date(gapminder_flu$date)
country_cases <- gapminder_flu %>% filter(between(date, as.Date("2015-01-01"), as.Date("2015-12-30"))) %>% filter(continent == "Europe") %>% group_by(country) %>% summarise(mean_cases=mean(as.numeric(cases)))
graph <-country_cases %>%
ggplot(aes(x = country, y = mean_cases, fill = country_cases$country)) +
geom_bar(stat = "identity") +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
labs(
title = "Average flu cases per europian country in 2015"
)
ggplotly(graph)
## Warning: Use of `country_cases$country` is discouraged. Use `country` instead.